In [ ]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
1. Familiar with Python
2. Completed Chapter 4: Mobile Convolutional Networks
1. Use metaparameter to thin a MobileNet v1.
2. Code a mobile convolutional network style classifier.
3. Code a SqueezeNet fire block
4. Quantize a mobile convolutional network
In [2]:
!pip install opencv-python
Let's start with a MobileNet v1 coded using the procedural reuse design pattern.
You will need to:
1. Set the thinning factor (width multiplier) are various locations in the code.
2. Set the max value for ReLU to clip values above the max value.
3. Calculate the number of thinned filters in the mobilenet blocks.
In [ ]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import ZeroPadding2D, Conv2D, BatchNormalization, ReLU
from tensorflow.keras.layers import DepthwiseConv2D, GlobalAveragePooling2D, Reshape, Dropout
def stem(inputs, alpha):
""" Construct the Stem Group
inputs : input tensor
alpha : width multiplier
"""
# Convolutional block
# Replace the ?? by the thinning factor
# HINT: reduce the number of filters (32) by the thinning factor
x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
x = Conv2D(32 * ??, (3, 3), strides=(2, 2), padding='valid')(x)
x = BatchNormalization()(x)
# Replace the max value to clip
# HINT: the best value found by the authors
x = ReLU(??)(x)
# Depthwise Separable Convolution Block
x = depthwise_block(x, 64, alpha, (1, 1))
return x
# Relace the ?? by the parameter for thinning
# HINT: the same parameter name as in the stem()
def learner(x, ??):
""" Construct the Learner
x : input to the learner
alpha : width multiplier
"""
# First Depthwise Separable Convolution Group
x = group(x, 128, 2, alpha)
# Second Depthwise Separable Convolution Group
x = group(x, 256, 2, alpha)
# Third Depthwise Separable Convolution Group
x = group(x, 512, 6, alpha)
# Fourth Depthwise Separable Convolution Group
x = group(x, 1024, 2, alpha)
return x
def group(x, n_filters, n_blocks, alpha):
""" Construct a Depthwise Separable Convolution Group
x : input to the group
n_filters : number of filters
n_blocks : number of blocks in the group
alpha : width multiplier
"""
# In first block, the depthwise convolution is strided - feature map size reduction
# Replace the ?? with the thinning factor
# HINT: the name of the parameter passed to this function
x = depthwise_block(x, n_filters, ??, strides=(2, 2))
# Remaining blocks
for _ in range(n_blocks - 1):
x = depthwise_block(x, n_filters, alpha, strides=(1, 1))
return x
def depthwise_block(x, n_filters, alpha, strides):
""" Construct a Depthwise Separable Convolution block
x : input to the block
n_filters : number of filters
alpha : width multiplier
strides : strides
"""
# Apply the width filter to the number of feature maps
# Replace the ?? with the thinned calculation for the number of filters.
# HINT: multiple the number of filters by the thinning factor. Remember this will be a real number and the convolution layers require
# an integer, so you will need to cast the result.
filters = ??
# Strided convolution to match number of filters
if strides == (2, 2):
x = ZeroPadding2D(padding=((0, 1), (0, 1)))(x)
padding = 'valid'
else:
padding = 'same'
# Depthwise Convolution
x = DepthwiseConv2D((3, 3), strides, padding=padding)(x)
x = BatchNormalization()(x)
x = ReLU(6.0)(x)
# Pointwise Convolution
x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = ReLU(6.0)(x)
return x
def classifier(x, alpha, dropout, n_classes):
""" Construct the classifier group
x : input to the classifier
alpha : width multiplier
dropout : dropout percentage
n_classes : number of output classes
"""
# Flatten the feature maps into 1D feature maps (?, N)
x = GlobalAveragePooling2D()(x)
# Reshape the feature maps to (?, 1, 1, 1024)
shape = (1, 1, int(1024 * alpha))
x = Reshape(shape)(x)
# Perform dropout for preventing overfitting
x = Dropout(dropout)(x)
# Use convolution for classifying (emulates a fully connected layer)
x = Conv2D(n_classes, (1, 1), padding='same', activation='softmax')(x)
# Reshape the resulting output to 1D vector of number of classes
x = Reshape((n_classes, ))(x)
return x
# Meta-parameter: width multiplier (0 .. 1) for reducing number of filters.
# Replace the ?? with the thinning factor - let's start with no thinning
# HINT: one
alpha = ??
# Meta-parameter: dropout rate
dropout = 0.5
inputs = Input(shape=(224, 224, 3))
# The Stem Group
x = stem(inputs, alpha)
# The Learner
x = learner(x, alpha)
# The classifier for 1000 classes
outputs = classifier(x, alpha, dropout, 1000)
# Instantiate the Model
model = Model(inputs, outputs)
It end of the output should look like below.
_________________________________________________________________
re_lu_26 (ReLU) (None, 7, 7, 1024) 0
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024) 0
_________________________________________________________________
reshape (Reshape) (None, 1, 1, 1024) 0
_________________________________________________________________
dropout (Dropout) (None, 1, 1, 1024) 0
_________________________________________________________________
conv2d_14 (Conv2D) (None, 1, 1, 1000) 1025000
_________________________________________________________________
reshape_1 (Reshape) (None, 1000) 0
=================================================================
Total params: 4,264,808
Trainable params: 4,242,920
Non-trainable params: 21,888
In [ ]:
model.summary()
Let's now code the mobile convolutional network style for a classifier, where a convolutional layer is used in place of a dense layer.
1. Set the number of filters in Conv2D to the number of classes.
2. Set the layer to for reducing and flattening the feature maps.
3. Set the final activation function for the class probability distribution.
In the summary, you should see 513K parameters.
In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, Activation
def classifier(x, n_classes):
''' Construct the Classifier
x : input to the classifier
n_classes: number of output classes
'''
# Repace the ?? with the number of filters
# HINT: set the number of filters equal to number of classes
x = Conv2D(??, (1, 1), strides=1, activation='relu', padding='same')(x)
# reduce each filter (class) to a single value and flatten to a 1D vector
# Replace the ?? with the layer that does global average pooling and flattens into 1D vector
# HINT: the name of the layer is in the the import from tensorflow.keras.layers
x = ??()(x)
# Replace the ?? with the activation function (string name) used for multi-classification
# HINT: it's the same as if we used a dense layer
outputs = Activation(??)(x)
return outputs
# let's pretend this is the final feature map size and numbern before the classifier
final_feature_maps = Input((4, 4, 512))
model = Model(final_feature_maps, classifier(final_feature_maps, 1000))
model.summary()
In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Concatenate
def fire_block(x, n_filters):
''' Construct a Fire Block
x : input to the block
n_filters: number of filters
'''
# squeeze layer
# Replace the ?? with a bottleneck filter size
# HINT: A bottleneck is a 1 by 1 filter that learns how to reduce size of number of feature maps
squeeze = Conv2D(n_filters, (??, ??), strides=1, activation='relu', padding='same')(x)
# branch the squeeze layer into a 1x1 and 3x3 convolution and double the number of filters
# Replace the ??s with the input from the squeeze layer
# HINT: both convolutional layers have the same input
expand1x1 = Conv2D(n_filters * 4, (1, 1), strides=1, activation='relu', padding='same')(??)
expand3x3 = Conv2D(n_filters * 4, (3, 3), strides=1, activation='relu', padding='same')(??)
# concatenate the feature maps from the 1x1 and 3x3 branches
# Replace the ?? with the output from the 3x3 expand branch
# HINT: it's the branch with the 3x3 filter
x = Concatenate()([expand1x1, ??])
return x
# The input shape
inputs = Input((224, 224, 3))
outputs = fire_block(inputs, 16)
# Instantiate the Model
model = Model(inputs, outputs)
model.summary()
The output should look like below.
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 224, 224, 3) 0
__________________________________________________________________________________________________
conv2d_3 (Conv2D) (None, 224, 224, 16) 64 input_3[0][0]
__________________________________________________________________________________________________
conv2d_4 (Conv2D) (None, 224, 224, 64) 1088 conv2d_3[0][0]
__________________________________________________________________________________________________
conv2d_5 (Conv2D) (None, 224, 224, 64) 9280 conv2d_3[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 224, 224, 128 0 conv2d_4[0][0]
conv2d_5[0][0]
==================================================================================================
Total params: 10,432
Trainable params: 10,432
Non-trainable params: 0
In [ ]:
model.summary()
In [12]:
from tensorflow.keras.applications import MobileNetV2
import cv2
import numpy as np
# Let's use a prebuilt MobileNet model trained on ImageNet
model = MobileNetV2(input_shape=(224, 224, 3), weights='imagenet')
# Let's make a prediction with the unquantized (large) version of the model
# We will use the image of an apple and preprocess it for the model
image = cv2.imread('apple.png')
image = cv2.resize(image, (224, 224))
image = (image / 255.0).astype(np.float32)
# now make the prediction
probabilities = model.predict(np.asarray([image]))
prediction = np.argmax(probabilities)
# Okay, it predicts the label associated with the value 948 (apple)
print("prediction", prediction)
In [19]:
import tensorflow as tf
# Create an instance of the converter for TF.Keras (keras format) model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Convert the model to the TFLite format
# Replace the ?? with the method that converts the keras (large) model to a quantatized TFLite model
# HINT: method is called convert.
tflite_model = converter.??()
# Instantiate an interpreter for the TFLite model
interpreter = tf.lite.Interpreter(model_content=tflite_model)
# Allocate the input and output tensors for the model
interpreter.allocate_tensors()
# Get input and output tensors details needed for prediction
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# pass the image as a batch to the input tensor
# Replace the ?? with the image of an apple as a batch of 1.
# HINT: look back on ho
interpreter.set_tensor(input_details[0]['index'], ??)
# Execute (invoke) the interpreter to perform the prediction
interpreter.invoke()
# Get the output from the model
softmax = interpreter.get_tensor(output_details[0]['index'])
# multi-class example, determine the label predicted from the softmax output
prediction = np.argmax(softmax)
print("prediction", prediction)